pwd()
"C:\\Users\\user"
using Pkg
using LightGraphs
using MatrixNetworks
using VegaDatasets
using DataFrames
using SparseArrays
using LinearAlgebra
using Plots
using VegaLite
airports = dataset("airports")
| iata | name | city | state | country | latitude | longitude |
|---|---|---|---|---|---|---|
| "00M" | "Thigpen" | "Bay Springs" | "MS" | "USA" | 31.9538 | -89.2345 |
| "00R" | "Livingston Municipal" | "Livingston" | "TX" | "USA" | 30.6859 | -95.0179 |
| "00V" | "Meadow Lake" | "Colorado Springs" | "CO" | "USA" | 38.9457 | -104.57 |
| "01G" | "Perry-Warsaw" | "Perry" | "NY" | "USA" | 42.7413 | -78.0521 |
| "01J" | "Hilliard Airpark" | "Hilliard" | "FL" | "USA" | 30.688 | -81.9059 |
| "01M" | "Tishomingo County" | "Belmont" | "MS" | "USA" | 34.4917 | -88.2011 |
| "02A" | "Gragg-Wade" | "Clanton" | "AL" | "USA" | 32.8505 | -86.6115 |
| "02C" | "Capitol" | "Brookfield" | "WI" | "USA" | 43.0875 | -88.1779 |
| "02G" | "Columbiana County" | "East Liverpool" | "OH" | "USA" | 40.6733 | -80.6414 |
| "03D" | "Memphis Memorial" | "Memphis" | "MO" | "USA" | 40.4473 | -92.227 |
| ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ |
... with 3366 more rows.
typeof(airports)
VegaDatasets.VegaDataset
describe(DataFrame(airports))
| variable | mean | min | median | max | |
|---|---|---|---|---|---|
| Symbol | Union… | Any | Union… | Any | |
| 1 | iata | 00M | ZZV | ||
| 2 | name | Abbeville Chris Crusta Memorial | Zephyrhills Municipal | ||
| 3 | city | Abbeville | Zuni | ||
| 4 | state | AK | WY | ||
| 5 | country | Federated States of Micronesia | USA | ||
| 6 | latitude | 40.0365 | 7.36722 | 39.4344 | 71.2854 |
| 7 | longitude | -98.6212 | -176.646 | -93.5994 | 145.621 |
flightsairport = dataset("flights-airport")
| origin | destination | count |
|---|---|---|
| "ABE" | "ATL" | 853 |
| "ABE" | "BHM" | 1 |
| "ABE" | "CLE" | 805 |
| "ABE" | "CLT" | 465 |
| "ABE" | "CVG" | 247 |
| "ABE" | "DTW" | 997 |
| "ABE" | "JFK" | 3 |
| "ABE" | "LGA" | 9 |
| "ABE" | "ORD" | 1425 |
| "ABE" | "PHL" | 2 |
| ⋮ | ⋮ | ⋮ |
... with 5356 more rows.
describe(DataFrame(flightsairport))
| variable | mean | min | median | max | nmissing | eltype | |
|---|---|---|---|---|---|---|---|
| Symbol | Union… | Any | Union… | Any | Int64 | DataType | |
| 1 | origin | ABE | YUM | 0 | String | ||
| 2 | destination | ABE | YUM | 0 | String | ||
| 3 | count | 1306.32 | 1 | 777.0 | 13788 | 0 | Int64 |
names(DataFrame(flightsairport))
3-element Vector{String}:
"origin"
"destination"
"count"
typeof(flightsairport)
VegaDatasets.VegaDataset
flightsairportdf = DataFrame(flightsairport)
| origin | destination | count | |
|---|---|---|---|
| String | String | Int64 | |
| 1 | ABE | ATL | 853 |
| 2 | ABE | BHM | 1 |
| 3 | ABE | CLE | 805 |
| 4 | ABE | CLT | 465 |
| 5 | ABE | CVG | 247 |
| 6 | ABE | DTW | 997 |
| 7 | ABE | JFK | 3 |
| 8 | ABE | LGA | 9 |
| 9 | ABE | ORD | 1425 |
| 10 | ABE | PHL | 2 |
| 11 | ABI | DFW | 2660 |
| 12 | ABQ | AMA | 368 |
| 13 | ABQ | ATL | 1067 |
| 14 | ABQ | AUS | 433 |
| 15 | ABQ | BWI | 546 |
| 16 | ABQ | CLE | 12 |
| 17 | ABQ | CVG | 260 |
| 18 | ABQ | DAL | 3078 |
| 19 | ABQ | DEN | 4318 |
| 20 | ABQ | DFW | 2888 |
| 21 | ABQ | ELP | 799 |
| 22 | ABQ | EWR | 167 |
| 23 | ABQ | HOU | 1011 |
| 24 | ABQ | IAD | 365 |
| 25 | ABQ | IAH | 2261 |
| 26 | ABQ | LAS | 2402 |
| 27 | ABQ | LAX | 2395 |
| 28 | ABQ | LBB | 366 |
| 29 | ABQ | MAF | 367 |
| 30 | ABQ | MCI | 670 |
| ⋮ | ⋮ | ⋮ | ⋮ |
allairports = vcat(flightsairportdf[!, :origin], flightsairportdf[!, :destination])
uairports = unique(allairports)
305-element Vector{String}:
"ABE"
"ABI"
"ABQ"
"ABY"
"ACK"
"ACT"
"ACV"
"ACY"
"ADK"
"ADQ"
"AEX"
"AGS"
"AKN"
⋮
"TYR"
"TYS"
"VLD"
"VPS"
"WRG"
"WYS"
"XNA"
"YAK"
"YKM"
"YUM"
"CYS"
"OGD"
# create ab airports data frame that has a subset of airports that are only included in the routes dataset
airportsdf = DataFrame(airports)
subsetairports = map(i -> findfirst(airportsdf[!, :iata] .== uairports[i]), 1:length(uairports))
airportsdf_subset = airportsdf[subsetairports, :]
| iata | name | city | state | country | |
|---|---|---|---|---|---|
| String | String | String | String | String | |
| 1 | ABE | Lehigh Valley International | Allentown | PA | USA |
| 2 | ABI | Abilene Regional | Abilene | TX | USA |
| 3 | ABQ | Albuquerque International | Albuquerque | NM | USA |
| 4 | ABY | Southwest Georgia Regional | Albany | GA | USA |
| 5 | ACK | Nantucket Memorial | Nantucket | MA | USA |
| 6 | ACT | Waco Regional | Waco | TX | USA |
| 7 | ACV | Arcata | Arcata/Eureka | CA | USA |
| 8 | ACY | Atlantic City International | Atlantic City | NJ | USA |
| 9 | ADK | Adak | Adak | AK | USA |
| 10 | ADQ | Kodiak | Kodiak | AK | USA |
| 11 | AEX | Alexandria International | Alexandria | LA | USA |
| 12 | AGS | Bush | Augusta | GA | USA |
| 13 | AKN | King Salmon | King Salmon | AK | USA |
| 14 | ALB | Albany Cty | Albany | NY | USA |
| 15 | ALO | Waterloo Municipal | Waterloo | IA | USA |
| 16 | AMA | Amarillo International | Amarillo | TX | USA |
| 17 | ANC | Ted Stevens Anchorage International | Anchorage | AK | USA |
| 18 | ASE | Aspen-Pitkin Co/Sardy | Aspen | CO | USA |
| 19 | ATL | William B Hartsfield-Atlanta Intl | Atlanta | GA | USA |
| 20 | ATW | Outagamie County Regional | Appleton | WI | USA |
| 21 | AUS | Austin-Bergstrom International | Austin | TX | USA |
| 22 | AVL | Asheville Regional | Asheville | NC | USA |
| 23 | AVP | Wilkes-Barre/Scranton Intl | Wilkes-Barre/Scranton | PA | USA |
| 24 | AZO | Kalamazoo County | Kalamazoo | MI | USA |
| 25 | BDL | Bradley International | Windsor Locks | CT | USA |
| 26 | BET | Bethel | Bethel | AK | USA |
| 27 | BFL | Meadows | Bakersfield | CA | USA |
| 28 | BGM | Binghamton Regional | Binghamton | NY | USA |
| 29 | BGR | Bangor International | Bangor | ME | USA |
| 30 | BHM | Birmingham International | Birmingham | AL | USA |
| ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ |
# 인접행렬 만들기(Build the adjacency matrix)
ei_ids = findfirst.(isequal.(flightsairportdf[!, :origin]), [uairports])
5366-element Vector{Int64}:
1
1
1
1
1
1
1
1
1
1
2
3
3
⋮
300
300
300
301
301
302
303
303
303
303
303
303
ej_ids = findfirst.(isequal.(flightsairportdf[!, :destination]), [uairports])
5366-element Vector{Int64}:
19
30
62
64
74
88
151
163
211
220
82
16
19
⋮
240
261
268
54
152
268
115
143
156
158
221
268
edgeweights = flightsairportdf[!, :count]
5366-element Vector{Int64}:
853
1
805
465
247
997
3
9
1425
2
2660
368
1067
⋮
1
1
4
363
362
340
1
326
99
1044
1961
440
A = sparse(ei_ids, ej_ids, 1, length(uairports), length(uairports))
305×305 SparseMatrixCSC{Int64, Int64} with 5366 stored entries:
⠀⠀⣾⠀⠁⡀⡂⠄⡃⢘⢚⣈⢒⢀⠀⠀⠀⢒⠀⢸⡒⡅⠐⣒⠀⠠⡒⢚⢚⠀⠀⠀⠐⡰⠰⠖⠀⡒⠀⠀
⠚⠛⢾⠓⠲⡒⡖⠓⠖⢺⢻⢻⢺⡶⠂⠗⠒⠿⠖⢺⣿⠖⠒⠶⠶⠚⡟⠻⢿⠒⠐⠗⢖⢛⢺⡒⠒⠞⠒⠆
⠠⠀⣸⠢⠪⠂⠧⠰⠷⢺⢹⠸⢸⠰⠲⠢⠁⢻⠄⠸⠷⠇⠐⢿⠾⠀⡷⢺⠾⠇⠘⠷⠴⠸⢸⠸⠄⠇⠀⠀
⠈⠌⢼⠁⠉⡓⠁⠀⡉⢸⢹⣸⢘⡘⠀⠈⠀⢹⠋⢉⠻⡋⠈⢻⢉⢀⡋⢹⠉⠉⠉⠉⢋⢸⠼⠉⠀⠋⠀⠄
⣫⢈⣸⣁⣹⣃⣃⣈⣏⣸⣹⣹⣘⢍⡀⣚⠀⣼⡃⣘⣿⣏⢸⣻⣻⢈⡏⣿⣻⡍⢈⣋⣫⣨⣸⣝⢙⣋⣁⡀
⡺⢐⣿⣒⣓⡒⣓⣒⣗⣺⣺⣺⣛⢒⡒⣖⠒⣻⣂⣒⣻⣾⢚⣳⣺⢒⡗⣻⣲⡒⢒⣖⣚⣐⣲⢚⣐⣛⡒⡒
⠘⢐⢺⡒⢒⡒⣒⠐⡖⢘⢻⢘⢀⠑⠀⡒⠐⣛⣂⢒⣿⡖⢰⣚⣓⠰⣆⢿⢻⣃⣒⣓⡺⢺⢸⢺⢒⡚⠆⠢
⠀⠀⢬⠀⠐⡀⠀⠀⡀⢨⢸⢬⢀⠠⠀⠁⠤⢠⠀⢀⠳⡃⠀⣩⣨⠨⡗⢺⠺⠀⠀⡒⠑⠸⠸⠂⠀⠝⠀⠀
⢠⢀⣼⡀⣤⣠⣌⣀⣀⣤⣼⣠⣴⢠⠀⣀⢀⣴⡀⣼⣤⣢⣠⣤⣀⡀⣧⣼⣤⣄⣀⣀⣤⢠⣸⣤⣀⣤⣀⡀
⢀⢀⣸⣡⣀⡁⡇⢈⣁⢩⢨⢸⢨⢘⠀⠀⣀⣬⡄⢨⣻⣁⢈⣿⡉⠀⣧⣩⣊⡁⣄⣁⣋⢈⣩⣈⣀⣃⡀⡃
⡘⠨⢻⠛⠽⠇⡻⠠⡿⣿⣿⢾⢻⠿⠽⠢⠠⣻⠟⢺⣎⠝⠘⣿⡿⠺⣿⢿⣿⠇⠋⠿⢿⢻⢻⣿⠄⠿⠬⠇
⢰⢠⢸⡄⣼⣄⣦⣀⣶⣲⢾⣰⣰⢰⡄⣠⠀⣾⣧⣴⣶⣴⢠⡶⣶⣐⣗⣾⣶⣦⢤⣦⣶⢰⣸⣲⠠⣶⣄⡀
⠂⠀⢸⠣⠚⠃⠃⠀⡛⢘⢺⣚⢘⠚⠂⡚⠀⠸⠃⠈⣻⡃⢘⢻⢐⠑⡇⣾⢙⡃⠈⡫⢚⢸⢘⠘⠁⠓⠁⠁
⣸⢈⣿⡉⣽⣃⣏⣈⣏⣭⣽⣭⣬⣝⣹⣍⣉⣿⡉⣻⣿⣻⣹⣽⣩⣩⣵⣿⣿⣁⣉⣹⣹⣹⣹⣯⣈⣹⣁⡁
⠘⠐⢻⠓⠚⠇⠇⠀⡟⠺⢸⠺⠻⢲⠘⠖⠀⢻⠊⠸⠿⠟⠸⣿⠷⠘⠟⢻⠾⠃⠓⠛⠞⢺⢸⠓⠚⡗⡀⠂
⠀⠀⢰⠆⢴⡆⡆⠀⡦⢱⢸⢰⢸⢹⢀⠅⠀⢸⠄⢸⣯⡄⠠⣷⡤⢀⣇⣹⣽⡀⠀⡀⢰⢸⢸⡢⠀⢆⠋⠄
⢐⡀⣾⢑⣐⡣⣋⣐⡋⣺⢚⢸⣚⣎⢑⡀⠀⣛⡋⢘⣷⣓⢘⣛⣟⣰⣗⣺⣻⣁⣀⣒⢐⣲⣰⣚⠀⣓⡀⠀
⢰⠆⢺⠲⣚⡒⡖⠃⣖⢾⣸⢚⢲⢒⠳⠂⠒⣾⡂⢺⣿⣶⢲⣺⣒⠐⡷⣾⢶⠓⠳⡲⣰⢺⣺⣾⠂⣲⠗⠒
⢠⠠⣸⠆⠤⠇⠴⠀⡧⢰⣴⢰⣸⠰⢄⡄⠀⣴⠤⢸⣤⡅⢠⣦⠥⢀⣆⣰⢾⠤⡠⢄⢤⢠⢨⣠⠠⠆⠁⡀
⠀⠀⠸⠀⠌⠀⠠⠄⠁⠼⠸⠨⠨⠄⠀⠄⠀⠸⠠⠠⠦⠇⠈⠹⠄⠀⠅⠸⠠⠀⠂⠄⠀⠀⠼⠀⠁⠀⠀⠀
A = max.(A, A')
305×305 SparseMatrixCSC{Int64, Int64} with 5668 stored entries:
⠀⠀⣾⠀⠁⡂⡂⠄⡋⢚⢚⣊⢒⢀⠀⠀⠀⢒⠀⢸⡒⡍⠐⣒⠈⠠⡒⢚⢚⠀⠀⠀⠐⡰⠰⠖⠀⡒⠀⠀
⠚⠛⢾⠓⠲⡚⡖⠓⠖⢺⢻⢻⢺⡶⠂⠗⠒⠿⠖⣺⣿⠖⠒⠶⠶⡚⡟⠻⢿⠒⠰⠗⢞⢛⢺⡒⠲⠞⠒⠆
⠡⠠⣸⠢⠪⠂⢧⠰⠷⢺⢹⠸⢸⠰⠲⠢⠁⣻⠄⠸⠷⠇⠒⢿⠾⠀⡷⢻⠾⠇⠸⠷⠴⡸⢺⠸⠤⠇⠂⠁
⠈⠌⢼⠉⢉⡓⠁⠀⡉⢸⢹⣸⢘⡘⠀⠈⠂⢹⡋⢉⠻⡋⠈⢻⢉⢀⡋⢹⠉⠉⠉⠉⢋⢸⠼⠉⠐⠋⠀⠆
⣫⢈⣸⣁⣹⣃⣃⣈⣏⣹⣹⣹⣘⢍⡀⣚⠀⣼⡇⣘⣿⣯⢸⣻⣻⢈⡏⣿⣻⡍⢌⣋⣫⣨⣸⣝⢙⣋⣁⡄
⡺⢰⣿⣒⣓⡒⣓⣲⣗⣺⣺⣺⣛⢒⡒⣖⠒⣻⣂⣒⣻⣿⢚⣳⣺⢲⡗⣿⣲⡒⢒⣖⣚⣐⣲⢚⣐⣛⡒⡒
⠘⢐⢺⡶⢒⡒⣒⠰⡖⢜⢻⢘⢄⠑⠀⡒⠐⣛⣂⢒⣿⡖⢰⣚⣳⠰⣆⢿⢻⣃⣖⣓⡺⢾⢸⢺⢒⡚⠆⠦
⠀⠀⢬⠄⠸⡂⡀⠀⣠⢨⢸⢬⢠⠠⠄⠁⠤⢠⠀⢀⠳⡃⠀⣩⣨⠨⡗⢾⢺⠄⠄⡖⠑⠸⠹⠂⠀⠽⠀⠄
⢠⢀⣼⡄⣥⣠⣌⣀⣀⣤⣼⣠⣴⢠⠀⣃⢀⣴⡀⣼⣤⣢⣠⣤⣀⡀⣧⣼⣤⣄⣀⣀⣤⢠⣸⣤⣀⣤⣀⡀
⣀⣀⣸⣡⣀⡁⡏⢈⣉⢩⢨⢸⢨⢘⠀⢀⣀⣬⡄⣩⣻⣁⢉⣿⡉⠀⣧⣩⣊⡁⣄⣁⣋⢈⣩⣈⣀⣃⡀⡃
⡜⠬⢻⠟⠽⠇⡿⠢⡿⣿⣿⣾⢻⠿⠽⠢⠠⣻⠟⢺⣎⠝⢘⣿⡿⠺⣿⣿⣿⠇⠋⠿⢿⢻⢻⣿⠄⠿⠬⠇
⢰⢠⢸⡄⣼⣄⣦⣀⣶⣲⢾⣰⣰⢲⡄⣠⠀⣾⣧⣴⣶⣴⢠⡶⣶⣐⣗⣾⣶⣦⢤⣦⣶⢰⣸⣲⠠⣶⣆⡀
⠂⡀⣸⠣⠚⠃⠃⢐⡛⢚⢺⣚⢙⡚⡂⡚⠀⠸⠃⠈⣻⡋⢘⢻⢔⠑⡇⣾⣙⡃⠈⣫⢛⣹⢘⠘⠁⢓⠁⠁
⣸⢈⣿⡉⣽⣋⣏⣈⣯⣭⣽⣭⣬⣝⣹⣍⣉⣿⡍⣻⣿⣿⣹⣽⣩⣭⣵⣿⣿⣁⣍⣹⣹⣹⣹⣯⣈⣹⣁⡁
⠚⠐⢻⠓⠾⠇⡇⠀⡟⠾⢸⠺⠿⢲⠚⠖⠀⢿⠎⠸⠿⠟⠸⣿⠷⠸⠟⢻⠾⠃⠓⠻⠟⢺⢼⠓⠚⡗⡀⠂
⠀⠀⢴⠆⢶⡆⡇⠀⡦⢱⢸⢴⢼⢹⢠⠥⠀⢸⠄⢹⣯⡄⠠⣷⡦⣠⣇⣹⣽⡀⠀⡠⢰⢸⢹⡢⠀⢎⠋⠄
⢐⡠⣾⢑⣐⡣⣋⣐⡋⣺⢚⢸⣺⣎⣑⡀⠀⣛⡋⢘⣿⣓⢘⣛⣟⣰⣗⣺⣻⣁⣐⣒⢰⣲⣰⣚⠀⣓⡀⠀
⢰⠆⢺⠲⣚⡒⡖⠃⣖⢾⣸⢚⣲⣒⠳⠂⠒⣾⡃⢺⣿⣶⢲⣺⣒⠐⡷⣾⢶⠓⠳⡲⣰⢺⣺⣾⠂⣲⠗⠓
⢠⠠⣸⠆⠤⠇⡴⠀⡷⢰⣴⢸⣸⠰⣄⡄⠀⣼⠤⢸⣤⡅⢠⣦⢥⢀⣆⣸⢾⠤⡠⢄⢤⢠⢨⣠⠠⠆⠁⡀
⠀⠀⠸⠄⠌⠀⠠⠄⠁⠼⢸⠨⠨⡅⠀⠄⠀⠸⠤⠨⠦⠇⠈⠹⠅⠀⠅⠸⠠⠈⠋⠄⠀⠈⢽⠁⠁⠠⠀⠀
spy(A) # sparse 행렬을 그래프로 보여줌
issymmetric(A)
true
L = SimpleGraph(A) # A type representing an undirected graph.
{305, 2834} undirected simple Int64 graph
# SimpleGraph 실습
G = SimpleGraph(10)
add_edge!(G, 7, 5)
add_edge!(G, 3, 5)
add_edge!(G, 5, 2)
true
cc = scomponents(A)
MatrixNetworks.Strong_components_output([1, 1, 1, 1, 1, 1, 1, 1, 1, 1 … 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], [305], 1, MatrixNetwork{Int64}(305, [1, 13, 15, 56, 57, 60, 61, 67, 70, 71 … 5619, 5626, 5628, 5629, 5656, 5658, 5659, 5665, 5667, 5669], [19, 30, 62, 64, 74, 88, 151, 163, 168, 188 … 115, 143, 156, 158, 221, 268, 81, 268, 101, 268], [1, 1, 1, 1, 1, 1, 1, 1, 1, 1 … 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]))
# the degree distribution
degrees = sum(A, dims = 2)[:]
p1 = plot(sort(degrees, rev = true), ylabel = "log degree", legend = false, yaxis =:log)
p2 = plot(sort(degrees, rev = true), ylabel = "degree", legend = false)
plot(p1, p2, size = (600, 300))
maxdegreeid = argmax(degrees)
19
uairports[maxdegreeid]
"ATL"
Colon before a variable name creates a symbol
(Symbols) The
:character has two syntactic purposes in Julia. The first form creates a Symbol, an interned string used as one building-block of expressions::green과 같이
:이 앞에 붙은 문자열은 Symbol(심볼) 타입이다. Symbol 타입은 문자열보다 효율적이며 ID나 키로 사용할 수 있다. Symbol은 서로 붙일 수 없으며 프로그램 전체에서 상수로 사용할 때 정의한다.(데이터 프레임의 컬럼명(열 헤더) 또한 Symbol 이다.)Julia 프로그래밍 (에이콘 출판사, 2015)
us10m = dataset("us-10m")
Vega JSON Dataset
@vlplot(width = 500, height = 300) +
@vlplot(
mark = {
:geoshape,
fill =:lightgray,
feature =:states
},
data = {
values = us10m,
format = {
type=:topojson,
feature=:states
}
},
projection = {type =:albersUsa},
) +
@vlplot(
:circle,
data = airportsdf_subset,
projection={type=:albersUsa},
longitude = "longitude:q",
latitude = "latitude:q",
size ={value = 10},
color = {value =:steelblue}
) +
@vlplot(
:rule,
data=flightsairport,
transform=[
{filter = {field=:origin, equal=:ATL}},
{
lookup=:origin,
from={
data = airportsdf_subset,
key=:iata,
fields=["latitude", "longitude"]
},
as=["origin_latitude", "origin_longitude"]
},
{
lookup=:destination,
from={
data=airportsdf_subset,
key=:iata,
fields=["latitude", "longitude"]
},
as = ["dest_latitude", "dest_longitude"]
}
],
projection = {type=:albersUsa},
longitude="origin_longitude:q",
latitude="origin_latitude:q",
longitude2="dest_longitude:q",
latitude2="dest_latitude:q"
)
ATL_paths = dijkstra(A, maxdegreeid)
([1.0, 2.0, 1.0, 1.0, 2.0, 2.0, 2.0, 1.0, 2.0, 2.0 … 1.0, 1.0, 3.0, 2.0, 1.0, 3.0, 2.0, 2.0, 2.0, 2.0], [19, 82, 19, 19, 151, 82, 270, 19, 17, 17 … 19, 19, 154, 268, 19, 54, 268, 221, 268, 268])
?dijkstra
search: dijkstra dijkstra_shortest_paths
compute shortest paths using Dijkstra's algorithm.
d = dijkstra(A,u) computes the shortest path from vertex u to all nodes
reachable from vertex u using Dijkstra's algorithm for the problem.
The graph is given by the weighted sparse matrix A, where A(i,j) is
the distance between vertex i and j. In the output vector d,
the entry d[v] is the minimum distance between vertex u and vertex v.
A vertex w unreachable from u has d(w)=Inf.
pred is the predecessor tree to generate the actual shorest paths.
In the predecessor tree pred[v] is the vertex preceeding v in the
shortest path and pred[u]=0. Any unreachable vertex has pred[w]=0 as well.
If your network is unweighted, then use bfs instead.
# Find the minimum travel time between Los Angeles (LAX) and
# Rochester Minnesota (RST).
(A,xy,labels) = load_matrix_network_metadata("airports")
A = -A; # fix funny encoding of airport data
lax = 247; rst = 355
(d,pred) = dijkstra(A,lax)
@printf("Minimum time: %d",d[rst]); #Print the path
@printf("Path:")
u = rst
while(u != lax)
@printf("%s <-- ", labels[u])
u = pred[u];
if (u == lax)
@printf("%s", labels[lax])
end
end
typeof(ATL_paths)
Tuple{Vector{Float64}, Vector{Int64}}
ATL_paths[1][maxdegreeid]
0.0
maximum(ATL_paths[1])
3.0
@show stop1 = argmax(ATL_paths[1]) # 최대값의 인덱스를 반환
@show uairports[stop1];
stop1 = argmax(ATL_paths[1]) = 123 uairports[stop1] = "GST"
@show stop2 = ATL_paths[2][stop1]
@show uairports[stop2];
stop2 = (ATL_paths[2])[stop1] = 152 uairports[stop2] = "JNU"
@show stop3 = ATL_paths[2][stop2]
@show uairports[stop3];
stop3 = (ATL_paths[2])[stop2] = 259 uairports[stop3] = "SEA"
@show stop4 = ATL_paths[2][stop3]
@show uairports[stop4];
stop4 = (ATL_paths[2])[stop3] = 19 uairports[stop4] = "ATL"
그래프 생성 원리를 알기 위해 이번에는 하나의 그래프씩 출력해 보았다.
# 미국지도를 나타내주는 그래프
using VegaLite, VegaDatasets
us10m = dataset("us-10m")
airports = dataset("airports")
@vlplot(width = 800, height=500) +
@vlplot(
mark={
:geoshape,
fill="#eee",
stroke=:white
},
data={
values=us10m,
format={
type =:topojson,
feature=:states,
}
},
projection = {type =:albersUsa}
)
# 점을 뿌리는 그래프
@vlplot(
:circle,
data = airportsdf_subset,
projection={type =:albersUsa},
longitude="longitude:q",
latitude="latitude:q",
size={value=5},
color={value=:gray}
)
# 최단 경로를 선으로 나타내 주는 그래프
@vlplot(
:line,
data={
values=[
{airport=:ATL, order=1},
{airport=:SEA, order=2},
{airport=:JNU, order=3},
{airport=:GST, order=4}
]
},
transform=[{
lookup=:airport,
from={
data=airports,
key =:iata,
fields=["latitude", "longitude"]}
}],
projection={type=:albersUsa},
longitude="longitude:q",
latitude="latitude:q",
order={field=:order, type=:ordinal}
)
위의 그래프를 한 번에 그리면 아래와 같이 그려진다.
using VegaLite, VegaDatasets
us10m = dataset("us-10m")
airports = dataset("airports")
@vlplot(width =600, height=400) +
@vlplot(
mark={
:geoshape,
fill="#eee",
stroke=:white
},
data={
values=us10m,
format={
type =:topojson,
feature=:states,
}
},
projection = {type =:albersUsa}
) + # 미국지도를 나타내주는 그래프
@vlplot(
:circle,
data = airportsdf_subset,
projection={type =:albersUsa},
longitude="longitude:q",
latitude="latitude:q",
size={value=5},
color={value=:gray}
) + # 점을 뿌리는 그래프
@vlplot(
:line,
data={
values=[
{airport=:ATL, order=1},
{airport=:SEA, order=2},
{airport=:JNU, order=3},
{airport=:GST, order=4}
]
},
transform=[{
lookup=:airport,
from={
data=airports,
key =:iata,
fields=["latitude", "longitude"]}
}],
projection={type=:albersUsa},
longitude="longitude:q",
latitude="latitude:q",
order={field=:order, type=:ordinal}
) # 최단 경로를 선으로 나타내 주는 그래프
nodeid = argmin(degrees)
4
nodeid = argmin(degrees)
@show uairports[nodeid]
d = dijkstra(A, nodeid)
argmax(d[1]), uairports[argmax(d[1])]
uairports[nodeid] = "ABY"
(123, "GST")
function find_path(d, id)
shortestpath = zeros(Int, 1+Int.(d[1][id]))
shortestpath[1] = id
for i = 2:length(shortestpath)
shortestpath[i] = d[2][shortestpath[i-1]]
end
return shortestpath
end
find_path (generic function with 1 method)
p = find_path(d, 123)
5-element Vector{Int64}:
123
152
17
19
4
uairports[p]
5-element Vector{String}:
"GST"
"JNU"
"ANC"
"ATL"
"ABY"
Spanning Tree: Spanning Tree는 그래프에 있는 n개의 정점을 정확히 (n-1)개의 간선으로 연결
Spanning Tree 중에서 사용된 간선들의 가중치 합이 최소인 트리
Prim MST 알고리즘 시작 정점에서부터 출발하여 신장트리 집합을 단계적으로 확장 해나가는 방법으로 정점 선택을 기반으로 하는 알고리즘이며 이전 단계에서 만들어진 신장 트리를 확장하는 방법이다.
시작 단계에서는 시작 정점만이 MST(최소 비용 신장 트리) 집합에 포함된다. 앞 단계에서 만들어진 MST 집합에 인접한 정점들 중에서 최소 간선으로 연결된 정점을 선택하여 트리를 확장한다. 즉, 가장 낮은 가중치를 먼저 선택한다. 위의 과정을 트리가 (N-1)개의 간선을 가질 때까지 반복한다.
https://gmlwjd9405.github.io/2018/08/30/algorithm-prim-mst.html
https://gmlwjd9405.github.io/2018/08/30/algorithm-prim-mst.html
https://gmlwjd9405.github.io/2018/08/30/algorithm-prim-mst.html
?mst_prim
search: mst_prim mst_prim_matrix
compute a minimum spanning tree with Prim's algorithm.
T = mst_prim_matrix(A) computes a minimum spanning tree T using Prim's algorithm
for the spanning tree of a graph with non-negative edge weights.
T = mst_prim_matrix(A,false) produces an MST for just the component at A containing
vertex 1. T = mst_prim_matrix(A,0,u) produces the MST for the component
containing vertex u.
(ti,tj,tv,nverts) = mst_prim(A) returns the edges from the matrix and does not
convert to a sparse matrix structure. This saves a bit of work and is
required when there are 0 edge weights.
A = load_matrix_network("airports")
A = -A #convert to travel time
A = max.(A,A')
A = sparse(A)
(ti,tj,tv,nverts) = mst_prim(A)
ti, tj, tv, nverts = mst_prim(A)
([2, 3, 4, 5, 6, 7, 8, 9, 10, 11 … 296, 297, 298, 299, 300, 301, 302, 303, 304, 305], [66, 286, 19, 151, 82, 197, 19, 17, 17, 19 … 19, 182, 231, 268, 141, 54, 268, 156, 268, 101], [1, 1, 1, 1, 1, 1, 1, 1, 1, 1 … 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 305)
# 데이터 프레임으로 변환
df_edges = DataFrame(:ei => uairports[ti], :ej => uairports[tj])
| ei | ej | |
|---|---|---|
| String | String | |
| 1 | ABI | CMI |
| 2 | ABQ | TPA |
| 3 | ABY | ATL |
| 4 | ACK | JFK |
| 5 | ACT | DFW |
| 6 | ACV | MRY |
| 7 | ACY | ATL |
| 8 | ADK | ANC |
| 9 | ADQ | ANC |
| 10 | AEX | ATL |
| 11 | AGS | ATL |
| 12 | AKN | ANC |
| 13 | ALB | PHL |
| 14 | ALO | MSP |
| 15 | AMA | TUL |
| 16 | ANC | OGG |
| 17 | ASE | TUL |
| 18 | ATL | ABE |
| 19 | ATW | PHL |
| 20 | AUS | PHL |
| 21 | AVL | ATL |
| 22 | AVP | PHL |
| 23 | AZO | XNA |
| 24 | BDL | PHL |
| 25 | BET | ANC |
| 26 | BFL | LAS |
| 27 | BGM | DTW |
| 28 | BGR | ATL |
| 29 | BHM | ABE |
| 30 | BIL | ORD |
| ⋮ | ⋮ | ⋮ |
@vlplot(width=600, height=400) +
@vlplot(
mark={
:geoshape,
fill="#eee",
stroke=:white
},
data={
values=us10m,
format={
type=:topojson,
feature=:states
}
},
projection={type=:albersUsa},
) + # 미국 지도를 나타내는 플랏
@vlplot(
:circle,
data=airportsdf_subset,
projection={type=:albersUsa},
longitude="longitude:q",
latitude="latitude:q",
size={value=20},
color={value=:gray}
) + # 공항 노드를 써클로 나타내는 플랏
@vlplot(
:rule,
data=df_edges, #data=flightsairport,
transform=[
{
lookup=:ei,
from={
data=airportsdf_subset,
key=:iata,
fields=["latitude", "longitude"]
},
as=["originx", "originy"]
},
{
lookup=:ej,
from={
data=airportsdf_subset,
key=:iata,
fields=["latitude", "longitude"]
},
as=["destx", "desty"]
}
],
projection={type=:albersUsa},
longitude="originy:q",
latitude="originx:q",
longitude2="desty:q",
latitude2="destx:q"
) # 최소스패닝 트리를 그리는 플랏
https://ko.wikipedia.org/wiki/%ED%8E%98%EC%9D%B4%EC%A7%80%EB%9E%AD%ED%81%AC
?MatrixNetworks.pagerank
pagerank¶PageRank is the stationary distribution of a Markov chain defined as follows. The behavior of the chain at state i is:
The solution satisfies a linear system of equations. This function will solve that linear system to a 1-norm error of tol where tol is chosen by default to be the machine precision.
The solution is always a probability distribution.
x = pagerank(A::SparseMatrixCSC{V,Int},alpha::Float64)x = pagerank(A::MatrixNetwork{V},alpha::Float64)x = pagerank(A,alpha,eps::Float64) specifies solution tolerance too.A: The sparse matrix or matrix network that you wish to use to compute PageRank. In the case of a sparse matrix, it must have non-negative values and the values will impact the computation as we will compute a stochastic normalization as part of the algorithm.alpha: the teleportation parameter given above.tol: the tolerance, the default choice is machine precision for floating point, which is more than enough for almost all applications.pagerank(A,alpha)
# return the standard PageRank vector
# with uniform teleportation and alpha=0.85
# computed to machine precision
v = MatrixNetworks.pagerank(A, 0.85)
305-element Vector{Float64}:
0.0020923348051623326
0.0009436892155004145
0.006119901015186467
0.0006607117011710984
0.0008603728281911192
0.0006445871130434956
0.0015924822798577083
0.0009129198498983633
0.0007673129766872152
0.0007673129766872152
0.0009549149040574479
0.0010558743385657198
0.0013344573507603737
⋮
0.0006445871130434956
0.0027593013724303977
0.0006607117011710984
0.001475388547065869
0.0017187137444043738
0.0006494983427307581
0.0043517606900941854
0.0016096866743709605
0.0006494983427307581
0.0015555904158222685
0.0007866027711239782
0.0008103673010237374
sum(v)
0.9999999999999999
insertcols!(airportsdf_subset, 7, :pagerank_value => v)
| iata | name | city | state | country | |
|---|---|---|---|---|---|
| String | String | String | String | String | |
| 1 | ABE | Lehigh Valley International | Allentown | PA | USA |
| 2 | ABI | Abilene Regional | Abilene | TX | USA |
| 3 | ABQ | Albuquerque International | Albuquerque | NM | USA |
| 4 | ABY | Southwest Georgia Regional | Albany | GA | USA |
| 5 | ACK | Nantucket Memorial | Nantucket | MA | USA |
| 6 | ACT | Waco Regional | Waco | TX | USA |
| 7 | ACV | Arcata | Arcata/Eureka | CA | USA |
| 8 | ACY | Atlantic City International | Atlantic City | NJ | USA |
| 9 | ADK | Adak | Adak | AK | USA |
| 10 | ADQ | Kodiak | Kodiak | AK | USA |
| 11 | AEX | Alexandria International | Alexandria | LA | USA |
| 12 | AGS | Bush | Augusta | GA | USA |
| 13 | AKN | King Salmon | King Salmon | AK | USA |
| 14 | ALB | Albany Cty | Albany | NY | USA |
| 15 | ALO | Waterloo Municipal | Waterloo | IA | USA |
| 16 | AMA | Amarillo International | Amarillo | TX | USA |
| 17 | ANC | Ted Stevens Anchorage International | Anchorage | AK | USA |
| 18 | ASE | Aspen-Pitkin Co/Sardy | Aspen | CO | USA |
| 19 | ATL | William B Hartsfield-Atlanta Intl | Atlanta | GA | USA |
| 20 | ATW | Outagamie County Regional | Appleton | WI | USA |
| 21 | AUS | Austin-Bergstrom International | Austin | TX | USA |
| 22 | AVL | Asheville Regional | Asheville | NC | USA |
| 23 | AVP | Wilkes-Barre/Scranton Intl | Wilkes-Barre/Scranton | PA | USA |
| 24 | AZO | Kalamazoo County | Kalamazoo | MI | USA |
| 25 | BDL | Bradley International | Windsor Locks | CT | USA |
| 26 | BET | Bethel | Bethel | AK | USA |
| 27 | BFL | Meadows | Bakersfield | CA | USA |
| 28 | BGM | Binghamton Regional | Binghamton | NY | USA |
| 29 | BGR | Bangor International | Bangor | ME | USA |
| 30 | BHM | Birmingham International | Birmingham | AL | USA |
| ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ |
names(airportsdf_subset)
8-element Vector{String}:
"iata"
"name"
"city"
"state"
"country"
"latitude"
"pagerank_value"
"longitude"
@vlplot(width=500, height=300) +
@vlplot(
mark={
:geoshape,
fill="#eee",
stroke=:white
},
data={
values=us10m,
format={
type=:topojson,
feature=:states
}
},
projection={type=:albersUsa},
) # 미국 지도를 나타내 주는 그래프
@vlplot(
:circle,
data=airportsdf_subset,
projection={type=:albersUsa},
longitude="longitude:q",
latitude="latitude:q",
size="pagerank_value:q",
color={value=:steelblue}
) # 페이지 랭크 값의 크기만큼 버블을 키워주는 그래프
위의 두 그래프를 합치면
@vlplot(width=500, height=300) +
@vlplot(
mark={
:geoshape,
fill="#eee",
stroke=:white
},
data={
values=us10m,
format={
type=:topojson,
feature=:states
}
},
projection={type=:albersUsa},
) + # 미국 지도를 나타내 주는 그래프
@vlplot(
:circle,
data=airportsdf_subset,
projection={type=:albersUsa},
longitude="longitude:q",
latitude="latitude:q",
size="pagerank_value:q",
color={value=:steelblue}
) # 페이지 랭크 값의 크기만큼 버블을 키워주는 그래프
?clustercoeffs
search: clustercoeffs dirclustercoeffs global_clustering_coefficient
compute undirected clustering coefficients for a graph. clustercoeffs(A) computes a
normalized, weighted clustering coefficients from a graph represented by a symmetric
adjacency matrix A. clustercoeffs(A,weighted,normalized), with weighted and normalized
boolean values indicating whether the computation has to be weighted and/or normalized.
If weighted and normalized are not specified, they are understood as true
A = load_matrix_network("clique-10")
cc = clustercoeffs(MatrixNetwork(A))
cc = clustercoeffs(A)
cc[findall(cc.<=eps())] .= 0
cc
305-element Vector{Float64}:
0.8333333333333334
1.0
0.6475609756097561
0.0
0.6666666666666666
0.0
0.5333333333333333
1.0
0.0
0.0
1.0
1.0
1.0
⋮
0.0
0.9779411764705882
0.0
1.0
0.0
0.0
0.5897435897435898
0.0
0.0
0.6666666666666666
1.0
1.0
insertcols!(airportsdf_subset,7,:ccvalues=>cc)
| iata | name | city | state | country | |
|---|---|---|---|---|---|
| String | String | String | String | String | |
| 1 | ABE | Lehigh Valley International | Allentown | PA | USA |
| 2 | ABI | Abilene Regional | Abilene | TX | USA |
| 3 | ABQ | Albuquerque International | Albuquerque | NM | USA |
| 4 | ABY | Southwest Georgia Regional | Albany | GA | USA |
| 5 | ACK | Nantucket Memorial | Nantucket | MA | USA |
| 6 | ACT | Waco Regional | Waco | TX | USA |
| 7 | ACV | Arcata | Arcata/Eureka | CA | USA |
| 8 | ACY | Atlantic City International | Atlantic City | NJ | USA |
| 9 | ADK | Adak | Adak | AK | USA |
| 10 | ADQ | Kodiak | Kodiak | AK | USA |
| 11 | AEX | Alexandria International | Alexandria | LA | USA |
| 12 | AGS | Bush | Augusta | GA | USA |
| 13 | AKN | King Salmon | King Salmon | AK | USA |
| 14 | ALB | Albany Cty | Albany | NY | USA |
| 15 | ALO | Waterloo Municipal | Waterloo | IA | USA |
| 16 | AMA | Amarillo International | Amarillo | TX | USA |
| 17 | ANC | Ted Stevens Anchorage International | Anchorage | AK | USA |
| 18 | ASE | Aspen-Pitkin Co/Sardy | Aspen | CO | USA |
| 19 | ATL | William B Hartsfield-Atlanta Intl | Atlanta | GA | USA |
| 20 | ATW | Outagamie County Regional | Appleton | WI | USA |
| 21 | AUS | Austin-Bergstrom International | Austin | TX | USA |
| 22 | AVL | Asheville Regional | Asheville | NC | USA |
| 23 | AVP | Wilkes-Barre/Scranton Intl | Wilkes-Barre/Scranton | PA | USA |
| 24 | AZO | Kalamazoo County | Kalamazoo | MI | USA |
| 25 | BDL | Bradley International | Windsor Locks | CT | USA |
| 26 | BET | Bethel | Bethel | AK | USA |
| 27 | BFL | Meadows | Bakersfield | CA | USA |
| 28 | BGM | Binghamton Regional | Binghamton | NY | USA |
| 29 | BGR | Bangor International | Bangor | ME | USA |
| 30 | BHM | Birmingham International | Birmingham | AL | USA |
| ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ |
@vlplot(width=500, height=300) +
@vlplot(
mark={
:geoshape,
fill="#eee",
stroke=:white
},
data={
values=us10m,
format={
type=:topojson,
feature=:states
}
},
projection={type=:albersUsa},
) +
@vlplot(
:circle,
data=airportsdf_subset,
projection={type=:albersUsa},
longitude="longitude:q",
latitude="latitude:q",
size="ccvalues:q",
color={value=:gray}
) # size를 clustering coefficients를 사용함
After finishing this notebook, you should be able to:
(강의 자료 원문 내용)
We ran PageRank (which is a common algorithm to rank nodes in a network), and visualized the PageRank values on the US airports. The results agreed with the hypothesis that known hubs have higher PageRank value. Look at Atlanta, it's actually the biggest.
